/**
 * Outputs a representation of the IR as a control flow graph.
 *
 * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small
 * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most
 * uses, however, it is better to write a query that imports `PrintIR.qll`, extends
 * `PrintIRConfiguration`, and overrides `shouldPrintDeclaration()` to select a subset of declarations
 * to dump.
 *
 * Anatomy of a printed IR instruction
 *
 * An instruction:
 *
 * ```
 * # 2281|     v2281_19(void) = Call[~String]     : func:r2281_18, this:r2281_17
 * ```
 *
 * The prefix `# 2281|` specifies that this instruction was generated by the C++ source code on line 2281.
 * Scrolling up in the printed output, one will eventually find the name of the file to which the line
 * belongs.
 *
 * `v2281_19(void)` is the result of the instruction. Here, `v` means this is a void result or operand (so
 * there should be no later uses of the result; see below for other possible values). The `2281_19` is a
 * unique ID for the result. This is usually just the line number plus a small integer suffix to make it
 * unique within the function. The type of the result is `void`. In this case, it is `void`, because
 * `~String` returns `void`. The type of the result is usually just the name of the appropriate C++ type,
 * but it will sometimes be a type like `glval<int>`, which means result holds a glvalue, which at the
 * IR level works like a pointer. In other words, in the source code the type was `int`, but it is really
 * more like an `int*`. We see this, for example, in `x = y;`, where `x` is a glvalue.
 *
 * `Call` is the opcode of the instruction. Common opcodes include:
 *
 * * Arithmetic operations: `Add`, `Sub`, `Mul`, etc.
 * * Memory access operations: `Load`, `Store`.
 * * Function calls: `Call`.
 * * Literals: `Constant`.
 * * Variable addresses: `VariableAddress`.
 * * Function entry points: `EnterFunction`.
 * * Return from a function: `Return`, `ReturnVoid`. Note that the value being returned is set separately by a
 *   `Store` to a special `#return` variable.
 * * Stack unwinding for C++ function that throw and where the exception escapes the function: `Unwind`.
 * * Common exit point for `Unwind` and `Return`: `ExitFunction`.
 * * SSA-related opcodes: `Phi`, `Chi`.
 *
 * `[~String]` denotes additional information. The information might be present earlier in the IR, as is the case
 * for `Call`, where it is the name of the called function. This is also the case for `Load` and `Store`, where it
 * is the name of the variable that loaded or stored (if known). In the case of `Constant`, `FieldAddress`, and
 * `VariableAddress`, the information between brackets does not occur earlier.
 *
 * `func:r2281_18` and `this:r28281_17` are the operands of the instruction. The `func:` prefix denotes the operand
 * that holds the address of the called function. The `this:` prefix denotes the argument to the special `this`
 * parameter of an instance member function. `r2281_18`, `r2281_17` are the unique IDs of the operands. Each of these
 * matches the ID of a previously seen result, showing where that value came from. The `r` means that these are
 * "register" operands (see below).
 *
 * Result and operand kinds:
 *
 * Every result and operand is one of these three kinds:
 *
 * * `r` "register". These operands are not stored in any particular memory location. We can think of them as
 *   temporary values created during the evaluation of an expression. A register operand almost always has one
 *   use, often in the same block as its definition.
 * *  `m` "memory". These operands represents accesses to a specific memory location. The location could be a
 *    local variable, a global variable, a field of an object, an element of an array, or any memory that we happen
 *    to have a pointer to. These only occur as the result of a `Store`, the source operand of a `Load` or on the
 *    SSA instructions (`Phi`, `Chi`).
 * * `v` "void". Really just a register operand, but we mark register operands of type void with this special prefix
 *   so we know that there is no actual value there.
 *
 * Branches in the IR:
 *
 * The IR is divided into basic blocks. At the end of each block, there are one or more edges showing the possible
 * control flow successors of the block.
 *
 * ```
 * #   44|     v44_3(void)    = ConditionalBranch : r44_2
 * #-----|   False -> Block 4
 * #-----|   True -> Block 3
 * ```
 * Here we have a block that ends with a conditional branch. The two edges show where the control flows to depending
 * on whether the condition is true or false.
 *
 * SSA instructions:
 *
 * We use `Phi` instructions in SSA to create a single definition for a variable that might be assigned on multiple
 * control flow paths. The `Phi` instruction merges the potential values of that variable from each predecessor edge,
 * and the resulting definition is then used wherever that variable is accessed later on.
 *
 * When dealing with aliased memory, we use the `Chi` instruction to create a single definition for memory that might
 * or might not have been updated by a store, depending on the actual address that was written to. For example, take:
 *
 * ```cpp
 * int x = 5;
 * int y = 7;
 * int* p = condition ? &x : &y;
 * *p = 6;
 * return x;
 * ```
 *
 * At the point where we store to `*p`, we do not know whether `p` points to `x` or `y`. Thus, we do not know whether
 * `return x;` is going to return the value that `x` was originally initialized to (5), or whether it will return 6,
 *  because it was overwritten by `*p = 6;`. We insert a `Chi` instruction immediately after the store to `*p`:
 *
 * ```
 * r2(int)     = Constant[6]
 * r3(int*)    = <<value of p>>
 * m4(int)     = Store           : &r3, r2  // Stores the constant 6 to *p
 * m5(unknown) = Chi             : total:m1, partial:m4
 * ```
 * The `partial:` operand represents the memory that was just stored. The `total:` operand represents the previous
 * contents of all of the memory that `p` might have pointed to (in this case, both `x` and `y`). The result of the
 * `Chi` represents the new contents of whatever memory the `total:` operand referred to. We usually do not know exactly
 * which parts of that memory were overwritten, but it does model that any of that memory could have been modified, so
 * that later instructions do not assume that the memory was unchanged.
 */

private import internal.IRInternal
private import IR
private import internal.PrintIRImports as Imports
import Imports::IRConfiguration

private newtype TPrintIRConfiguration = MkPrintIRConfiguration()

/**
 * The query can extend this class to control which declarations are printed.
 */
class PrintIRConfiguration extends TPrintIRConfiguration {
  /** Gets a textual representation of this configuration. */
  string toString() { result = "PrintIRConfiguration" }

  /**
   * Holds if the IR for `func` should be printed. By default, holds for all
   * functions, global and namespace variables, and static local variables.
   */
  predicate shouldPrintDeclaration(Language::Declaration decl) { any() }
}

/**
 * Override of `IRConfiguration` to only evaluate debug strings for the functions that are to be dumped.
 */
private class FilteredIRConfiguration extends IRConfiguration {
  override predicate shouldEvaluateDebugStringsForFunction(Language::Declaration func) {
    shouldPrintDeclaration(func)
  }
}

private predicate shouldPrintDeclaration(Language::Declaration decl) {
  exists(PrintIRConfiguration config | config.shouldPrintDeclaration(decl))
}

private predicate shouldPrintInstruction(Instruction i) {
  exists(IRPropertyProvider provider | provider.shouldPrintInstruction(i))
}

private predicate shouldPrintOperand(Operand operand) {
  exists(IRPropertyProvider provider | provider.shouldPrintOperand(operand))
}

private string getAdditionalInstructionProperty(Instruction instr, string key) {
  exists(IRPropertyProvider provider | result = provider.getInstructionProperty(instr, key))
}

private string getAdditionalBlockProperty(IRBlock block, string key) {
  exists(IRPropertyProvider provider | result = provider.getBlockProperty(block, key))
}

/**
 * Gets the properties of an operand from any active property providers.
 */
private string getAdditionalOperandProperty(Operand operand, string key) {
  exists(IRPropertyProvider provider | result = provider.getOperandProperty(operand, key))
}

/**
 * Gets a string listing the properties of the operand and their corresponding values. If the
 * operand has no properties, this predicate has no result.
 */
private string getOperandPropertyListString(Operand operand) {
  result =
    strictconcat(string key, string value |
      value = getAdditionalOperandProperty(operand, key)
    |
      key + ":" + value, ", "
    )
}

/**
 * Gets a string listing the properties of the operand and their corresponding values. The list is
 * surrounded by curly braces. If the operand has no properties, this predicate returns an empty
 * string.
 */
private string getOperandPropertyString(Operand operand) {
  result = "{" + getOperandPropertyListString(operand) + "}"
  or
  not exists(getOperandPropertyListString(operand)) and result = ""
}

private newtype TPrintableIRNode =
  TPrintableIRFunction(IRFunction irFunc) { shouldPrintDeclaration(irFunc.getFunction()) } or
  TPrintableIRBlock(IRBlock block) { shouldPrintDeclaration(block.getEnclosingFunction()) } or
  TPrintableInstruction(Instruction instr) {
    shouldPrintInstruction(instr) and shouldPrintDeclaration(instr.getEnclosingFunction())
  }

/**
 * A node to be emitted in the IR graph.
 */
abstract private class PrintableIRNode extends TPrintableIRNode {
  abstract string toString();

  /**
   * Gets the location to be emitted for the node.
   */
  abstract Language::Location getLocation();

  /**
   * Gets the label to be emitted for the node.
   */
  abstract string getLabel();

  /**
   * Gets the order in which the node appears in its parent node.
   */
  abstract int getOrder();

  /**
   * Gets the parent of this node.
   */
  abstract PrintableIRNode getParent();

  /**
   * Gets the kind of graph represented by this node ("graph" or "tree").
   */
  string getGraphKind() { none() }

  /**
   * Holds if this node should always be rendered as text, even in a graphical
   * viewer.
   */
  predicate forceText() { none() }

  /**
   * Gets the value of the node property with the specified key.
   */
  string getProperty(string key) {
    key = "semmle.label" and result = this.getLabel()
    or
    key = "semmle.order" and result = this.getOrder().toString()
    or
    key = "semmle.graphKind" and result = this.getGraphKind()
    or
    key = "semmle.forceText" and this.forceText() and result = "true"
  }
}

/**
 * An IR graph node representing a `IRFunction` object.
 */
private class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction {
  IRFunction irFunc;

  PrintableIRFunction() { this = TPrintableIRFunction(irFunc) }

  override string toString() { result = irFunc.toString() }

  override Language::Location getLocation() { result = irFunc.getLocation() }

  override string getLabel() {
    result = Imports::LanguageDebug::getIdentityString(irFunc.getFunction())
  }

  override int getOrder() {
    this =
      rank[result + 1](PrintableIRFunction orderedFunc, Language::Location location |
        location = orderedFunc.getIRFunction().getLocation()
      |
        orderedFunc
        order by
          location.getFile().getAbsolutePath(), location.getStartLine(), location.getStartColumn(),
          orderedFunc.getLabel()
      )
  }

  final override PrintableIRNode getParent() { none() }

  final IRFunction getIRFunction() { result = irFunc }
}

/**
 * An IR graph node representing an `IRBlock` object.
 */
private class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock {
  IRBlock block;

  PrintableIRBlock() { this = TPrintableIRBlock(block) }

  override string toString() { result = this.getLabel() }

  override Language::Location getLocation() { result = block.getLocation() }

  override string getLabel() { result = "Block " + block.getDisplayIndex().toString() }

  override int getOrder() { result = block.getDisplayIndex() }

  final override string getGraphKind() { result = "tree" }

  final override predicate forceText() { any() }

  final override PrintableIRFunction getParent() {
    result.getIRFunction() = block.getEnclosingIRFunction()
  }

  override string getProperty(string key) {
    result = PrintableIRNode.super.getProperty(key) or
    result = getAdditionalBlockProperty(block, key)
  }

  final IRBlock getBlock() { result = block }
}

/**
 * An IR graph node representing an `Instruction`.
 */
private class PrintableInstruction extends PrintableIRNode, TPrintableInstruction {
  Instruction instr;

  PrintableInstruction() { this = TPrintableInstruction(instr) }

  override string toString() { result = instr.toString() }

  override Language::Location getLocation() { result = instr.getLocation() }

  override string getLabel() {
    exists(IRBlock block |
      instr = block.getAnInstruction() and
      exists(
        string resultString, string operationString, string operandsString, int resultWidth,
        int operationWidth
      |
        resultString = instr.getResultString() and
        operationString = instr.getOperationString() and
        operandsString = this.getOperandsString() and
        columnWidths(block, resultWidth, operationWidth) and
        result =
          resultString + getPaddingString(resultWidth - resultString.length()) + " = " +
            operationString + getPaddingString(operationWidth - operationString.length()) + " : " +
            operandsString
      )
    )
  }

  override int getOrder() { result = instr.getDisplayIndexInBlock() }

  final override PrintableIRBlock getParent() { result.getBlock() = instr.getBlock() }

  final Instruction getInstruction() { result = instr }

  override string getProperty(string key) {
    result = PrintableIRNode.super.getProperty(key) or
    result = getAdditionalInstructionProperty(instr, key)
  }

  /**
   * Gets the string representation of the operand list. This is the same as
   * `Instruction::getOperandsString()`, except that each operand is annotated with any properties
   * provided by active `IRPropertyProvider` instances.
   */
  private string getOperandsString() {
    result =
      concat(Operand operand |
        operand = instr.getAnOperand() and
        shouldPrintOperand(operand)
      |
        operand.getDumpString() + getOperandPropertyString(operand), ", "
        order by
          operand.getDumpSortOrder()
      )
  }
}

private predicate columnWidths(IRBlock block, int resultWidth, int operationWidth) {
  resultWidth = max(Instruction instr | instr.getBlock() = block | instr.getResultString().length()) and
  operationWidth =
    max(Instruction instr | instr.getBlock() = block | instr.getOperationString().length())
}

private int maxColumnWidth() {
  result =
    max(Instruction instr, int width |
      width = instr.getResultString().length() or
      width = instr.getOperationString().length() or
      width = instr.getOperandsString().length()
    |
      width
    )
}

private string getPaddingString(int n) {
  n = 0 and result = ""
  or
  n > 0 and n <= maxColumnWidth() and result = getPaddingString(n - 1) + " "
}

/**
 * Holds if `node` belongs to the output graph, and its property `key` has the given `value`.
 */
query predicate nodes(PrintableIRNode node, string key, string value) {
  value = node.getProperty(key)
}

private int getSuccessorIndex(IRBlock pred, IRBlock succ) {
  succ =
    rank[result + 1](IRBlock aSucc, EdgeKind kind |
      aSucc = pred.getSuccessor(kind)
    |
      aSucc order by kind.toString()
    )
}

/**
 * Holds if the output graph contains an edge from `pred` to `succ`, and that edge's property `key`
 * has the given `value`.
 */
query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, string value) {
  exists(IRBlock predBlock, IRBlock succBlock |
    predBlock = pred.getBlock() and
    succBlock = succ.getBlock() and
    (
      key = "semmle.label" and
      exists(string kinds |
        kinds =
          strictconcat(EdgeKind k |
            predBlock.getSuccessor(k) = succBlock
          |
            k.toString(), "|" order by k.toString()
          )
      |
        if predBlock.getBackEdgeSuccessor(_) = succBlock
        then value = kinds + " (back edge)"
        else value = kinds
      )
      or
      key = "semmle.order" and
      value = getSuccessorIndex(predBlock, succBlock).toString()
    )
  )
}

/**
 * Holds if `parent` is the parent node of `child` in the output graph.
 */
query predicate parents(PrintableIRNode child, PrintableIRNode parent) {
  parent = child.getParent()
}
